/*

	This file prepares regressions sample at firm-product-year level

*/

cd ${work}5_reg_analysis

use input\sample_ipt if year>2002 , clear
destring jurnr, gen(jurnr_num)

*MERGE ON OUTCOMES FROM DE LOECKER ET AL. PROCEDURE
merge 1:1 jurnr_num vnr unit year using input\vars_ipt , keep(3) keepusing(sales log_quantity log_mu log_mc rho exit_x cn2) nogen

*EXCLUDE SMALL FLOWS
keep if sales>7.5

*EXCLUDE OUTLIERS OF ESTIMATED MARKUPS (FOLLOWING DE LOECKER ET AL.)
keep if exit_x>0 | exit_x==.
drop exit_x
winsor2 log_mu, cuts(3 97) trim replace
keep if log_mu!=.

*EXPORT SUPPORT
merge m:1 jurnr_num vnr year using input\promotion_ipt, keep(1 3) keepusing(treated_1 treated_2 tdcinitiative_2) nogen
merge m:1 jurnr_num year using input\promotion_it, keep(1 3) keepusing(itemvalue_1 discount_1 itemvalue_lag discount_lag) nogen
foreach v of varlist treated_1 treated_2 tdcinitiative_2 itemvalue_1 discount_1 {
    replace `v'=0 if `v'==.
}
drop jurnr_num

*OUTCOMES
gen log_price = log(sales /1e3) - log_quantity
gen log_quality = exp(log_mu)*log_price + (exp(log_mu)-1)*log_quantity

*CREATE PRODUCT-LEVEL SUPPORT EXPENDITURES
bysort jurnr year : egen temp = total(treated_1)
gen itemvalue_ipt = (itemvalue_1 - discount_1) * treated_1/temp
replace itemvalue_ipt = 0 if itemvalue_ipt==.
	foreach v of varlist itemvalue_1 discount_1 itemvalue_lag discount_lag {
		replace `v'=`v' * treated_1/temp
		replace `v'=0 if `v'==.
	}
drop treated_1 temp

*PROFIT MEASURES
gen var_profits = (1-1/exp(log_mu)) * sales
gen var_profits_nos = var_profits - itemvalue_ipt
drop var_profits

*scale down expenditures and wage bill by the ratio between total firm revenue and total product sales
merge m:1 jurnr year using input\fire_it, assert(2 3) keep(3) keepusing(revenue wagebill expenditures) nogen
bysort jurnr year : egen product_sales = total(sales)
gen product_share = product_sales / revenue
replace product_share = 1 if product_share>1
gen profits = sales - rho*product_share*(wagebill+expenditures) - itemvalue_ipt
drop itemvalue_ipt sales revenue wagebill expenditures product_sales product_share rho

*LAGGED EXPORT CONTROLS
preserve
	use input\export_raw_ipdt, clear

	destring vare, gen(cn8) force
	merge m:1 cn8 year using input\cn8_cn8plus_2002_2015, keepusing(cn8plus) keep(3) nogen
	rename cn8plus vnr	
	
	collapse (sum) vrd , by(jurnr vnr land year)
	collapse (count) dest=vrd , by(jurnr vnr year)

	replace year=year+1
	save temp\temp, replace
restore	
merge m:1 jurnr vnr year using temp\temp, keep(1 3) keepusing(dest) nogen
erase temp\temp.dta
replace dest=0 if dest==.
gen dest_sq=dest^2
gen exp=(dest>0)

*LAGGED SALES
preserve
	use input\firm_it, clear
	keep jurnr year sales
	replace sales=sales/1e6
	deflate sales
	replace year=year+1
	save temp\temp, replace
restore
merge m:1 jurnr year using temp\temp, assert(2 3) keep(3) keepusing(sales) nogen
erase temp\temp.dta

*IHS
foreach v of varlist var_profits_nos profits {
	gen double `v'_temp = `v'
    replace `v'_temp=`v'_temp/1e3
	deflate `v'_temp
	gen ihs_`v' = log(`v'_temp+sqrt(`v'_temp^2+1))		
	drop `v'_temp
}

*WINSORIZE
winsor2 log_mu ihs_var_profits_nos ihs_profits , cuts(1 99) replace
winsor2 log_mc log_quantity log_price log_quality , cuts(1 99) by(unit) replace

*LABELS
lab var log_quantity			"ln(Quantity)"
lab var log_price				"ln(Price)"
lab var log_mc					"ln(Marginal Cost)"
lab var log_mu					"ln(Markup)"
lab var log_quality				"ln(Quality)"
lab var ihs_var_profits_nos 	"ihs(Variable profits net of support)"
lab var ihs_profits				"ihs(Sales net of costs)"

lab var exp						"Export Status"
lab var dest					"\#Destinations"
lab var dest_sq					"\#Destinations sq."

*FEs
egen IY=group(cn2 year)
egen FY=group(jurnr year)
egen FP=group(jurnr vnr unit)
egen PY=group(vnr unit year)
egen F=group(jurnr)

*SAVE
compress
save temp\regsample_ipt, replace